package com.nosite.batch.server.util;

import lombok.extern.slf4j.Slf4j;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;

/**
 * @Auther: SunYuGuo
 * @Date: 2019/11/17 22:27
 * @Description:
 */
@Component
@Slf4j
public class ReptileData {

    public Elements reptileAdapter(String keywords,String url) {
        Elements result = new Elements();
        //动态模拟请求数据
        CloseableHttpClient httpclient = HttpClients.createDefault();
        HttpGet httpGet = new HttpGet(url+keywords);
        //模拟浏览器浏览（user-agent的值可以通过浏览器浏览，查看发出请求的头文件获取）
        httpGet.setHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36");
        CloseableHttpResponse response = null;
        try {
            response = httpclient.execute(httpGet);
            //获取响应状态码
            int statusCode = response.getStatusLine().getStatusCode();

            HttpEntity entity = response.getEntity();
            //如果状态响应码为200，则获取html实体内容或者json文件
            if (statusCode == 200) {
                String html = EntityUtils.toString(entity, Consts.UTF_8);
                //提取HTML得到商品信息结果
                Document doc = null;
                // doc获取整个页面的所有数据
                doc = Jsoup.parse(html);
                // 通过浏览器查看商品页面的源代码，找到信息所在的div标签，再对其进行一步一步地解析
                Elements ulList = doc.select("div[class='view grid-nosku']");
                result = ulList.select("div[class='product']");
           }
        } catch (Exception e) {
            log.error("爬虫发生异常",e);
        } finally {
            try {
                response.close();
            } catch (IOException e) {
            }
        }
        return result;
    }
}
